#include <cuda_runtime.h> 
#include <device_launch_parameters.h>
#include <stdio.h>

__global__ void init_cluster_centers(float *d_anchors, const int num_of_clusters, const int *d_data_row_ptr, const int *d_data_cols_ind, const float *d_data_vals, const int n, const int d);

__global__ void get_euclid_square_dist_kernel(float *d_euclid_squared_dist_2, const int num_of_clusters, const int n, const float *d_anchors, const float *d_vals, const int *d_row_ptr, const int *d_cols_ind, const int d);

__global__ void find_nearest_cluster(const float *d_euclid_squared_dist_2, int *d_membership, const int n, const int num_of_clusters);

__global__ void update_cluster(float *d_anchors, const int *d_data_row_ptr, const int *d_data_cols_ind, const float *d_data_vals, const int n, const int num_of_clusters, const int d, const int *d_membership);
